import requests
import re
from pymongo import *
import pandas

client = MongoClient('mongodb://localhost:27017')
db = client.CaiPu
cook = db.cook


def getTitleUrl():
    url = 'http://www.meishij.net/chufang/diy/'
    # headers ='User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36' \
    # ' (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
    response = requests.get(url)
    print(response.status_code)
    html = response.text
    # print(html)
    reg = re.compile(r'<dd><a href="(.*?)">(.*?)</a></dd>')
    url = re.findall(reg, html)
    print(url)
    return url


def getCaiUrl(url):
    for i in range(1, 57):
        url = url + '?&page=' + str(i)
        print(url)
        response = requests.get(url)
        html = response.text
        reg = re.compile(r'<a target="_blank" href="(.*?)" title="(.*?)" class="big">')
        CaiUrl = re.findall(reg, html)
        print(CaiUrl)
        return CaiUrl


def getCook(i):
    response = requests.get(i[0])
    html = response.text

    reg1 = re.compile('<h4><a .*? href=".*?">(.*?)</a><span>(.*?)</span></h4>')
    cook1 = re.findall(reg1, html)
    print(cook1)

    reg2 = re.compile('<a .*? href=".*?">(.*?)</a></h4><span>(.*?)</span>')
    cook2 = re.findall(reg2, html)
    print(cook2)

    reg3 = re.compile(
        '<em class="step" id=".*?">(.*?)</em>.*?<p>(.*?)</p>.*?<img class="conimg" src="(.*?)" alt=""></p>', re.S)
    images = re.findall(reg3, html)
    print(images)

    cook.insert({'name': i[1], '主料': cook1, '配料': cook2, '做法': images})


def main():
    url = getTitleUrl()
    for i in url:

        print(i[0])
        url = getCaiUrl(i[0])
        for i in url:
            print(i)
            print(i[0])
            getCook(i)


main()
